- Example of scan in Studynet/Canvas
- Questions?
12/12/2019
# load packages and data
library(tidyverse)
data("swiss")
# compute summary statistics
swiss_summary <-
summarise(swiss,
avg_education = mean(Education, na.rm = TRUE),
avg_fertility = mean(Fertility, na.rm = TRUE),
N = n()
)
swiss_summary
## avg_education avg_fertility N ## 1 10.97872 70.14255 47
Problems?
swiss_summary_rounded <- round(swiss_summary, 2) swiss_summary_rounded
## avg_education avg_fertility N ## 1 10.98 70.14 47
format()-functionformat() exampleswiss_form <- format(swiss_summary_rounded,
decimal.mark=",")
swiss_form
## avg_education avg_fertility N ## 1 10,98 70,14 47
ggplot2)Three main approaches:
graphics package ((R Core Team 2018); shipped with the base R installation).Three main approaches:
graphics package ((R Core Team 2018); shipped with the base R installation).lattice package (Sarkar 2008), an implementation of the original Bell Labs 'Trellis' system.Three main approaches:
graphics package ((R Core Team 2018); shipped with the base R installation).lattice package (Sarkar 2008), an implementation of the original Bell Labs 'Trellis' system.ggplot2 package (Wickham 2016), an implementation of Leland Wilkinson's 'Grammar of Graphics'.ggplot2ggplot2 basicsUsing ggplot2 to generate a basic plot in R is quite simple. Basically, it involves three key points:
data.frame/tibble (in tidy format!).ggplot2 basicsUsing ggplot2 to generate a basic plot in R is quite simple. Basically, it involves three key points:
data.frame/tibble (in tidy format!).ggplot().ggplot2 basicsUsing ggplot2 to generate a basic plot in R is quite simple. Basically, it involves three key points:
data.frame/tibble (in tidy format!).ggplot().ggplot2 basicsUsing ggplot2 to generate a basic plot in R is quite simple. Basically, it involves three key points:
data.frame/tibble (in tidy format!).ggplot().ggplot(data = my_dataframe, aes(x= xvar, y= yvar))
swiss# load the R package library(tidyverse) # automatically loads ggplot2 # load the data data(swiss) # get details about the data set # ?swiss # inspect the data head(swiss)
## Fertility Agriculture Examination Education Catholic Infant.Mortality ## Courtelary 80.2 17.0 15 12 9.96 22.2 ## Delemont 83.1 45.1 6 9 84.84 22.2 ## Franches-Mnt 92.5 39.7 5 5 93.40 20.2 ## Moutier 85.8 36.5 12 7 33.77 20.3 ## Neuveville 76.9 43.5 17 15 5.16 20.6 ## Porrentruy 76.1 35.3 9 7 90.57 26.6
Code a province as 'Catholic' if more than 50% of the inhabitants are catholic:
# via tidyverse/mutate
swiss <- mutate(swiss,
Religion =
ifelse(50 < Catholic, 'Catholic', 'Protestant'))
# 'old school' alternative
swiss$Religion <- 'Protestant'
swiss$Religion[50 < swiss$Catholic] <- 'Catholic'
# set to factor
swiss$Religion <- as.factor(swiss$Religion)
ggplot(data = swiss, aes(x = Education, y = Examination))
ggplot(data = swiss, aes(x = Education, y = Examination)) +
geom_point()
ggplot(data = swiss, aes(x = Education, y = Examination)) +
geom_point() +
facet_wrap(~Religion)
ggplot(data = swiss, aes(x = Education, y = Examination)) +
geom_point() +
geom_smooth(method = 'loess') +
facet_wrap(~Religion)
ggplot(data = swiss, aes(x = Education, y = Examination)) +
geom_point() +
geom_smooth(method = 'lm') +
facet_wrap(~Religion)
ggplot(data = swiss, aes(x = Education, y = Examination)) +
geom_point(aes(color = Agriculture)) +
geom_smooth(method = 'lm') +
facet_wrap(~Religion)
ggplot(data = swiss, aes(x = Education, y = Examination)) +
geom_point(aes(color = Agriculture)) +
geom_smooth(method = 'lm') +
facet_wrap(~Religion) +
coord_flip()
ggplot(data = swiss, aes(x = Education, y = Examination)) +
geom_point(aes(color = Agriculture)) +
geom_smooth(method = 'lm') +
facet_wrap(~Religion) +
theme(legend.position = "bottom", axis.text=element_text(size=12) )
ggplot(data = swiss, aes(x = Education, y = Examination)) +
geom_point(aes(color = Agriculture)) +
geom_smooth(method = 'lm') +
facet_wrap(~Religion) +
theme_minimal()
ggplot(data = swiss, aes(x = Education, y = Examination)) +
geom_point(aes(color = Agriculture)) +
geom_smooth(method = 'lm') +
facet_wrap(~Religion) +
theme_dark()
R Core Team. 2018. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.
Sarkar, Deepayan. 2008. Lattice: Multivariate Data Visualization with R. New York: Springer. http://lmdvr.r-forge.r-project.org.
Wickham, Hadley. 2016. Ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. http://ggplot2.org.